This script is used for summarizing IGC Simulation result (with tract length)

Read in data first

rm(list=ls())  # clean up workspace
IGC.path <- "/Users/xji3/FromCluster_IGCSimulation01252016/"
paml.path <- "/Users/xji3/GitFolders/IGCSimulation/"
IGC.geo.list <- c(1.0, 3.0, 10.0, 50.0, 100.0, 500.0)
#IGC.geo.list <- c(1.0)
num.sim = 100
paralog = "YDR418W_YEL054C"

# Now read in PAML analysis of these simulated data set
data.path <- paste(paralog, "",sep = "/")
for (IGC.geo in IGC.geo.list){
  summary_mat <- NULL
  IGC.geo.path <- paste("IGCgeo_", toString(IGC.geo), ".0/", sep = "")
  file.name <- paste("geo", paste(toString(IGC.geo), ".0", sep = ""), "paml", "summary.txt", sep = "_")
  for (sim.num in 0:(num.sim - 1)){
    summary_file <- paste(paml.path, file.name, sep = "")
    if (file.exists(summary_file)){
      all <- readLines(summary_file, n = -1)
      col.names <- strsplit(all[1], ' ')[[1]][-1]
      row.names <- strsplit(all[length(all)], ' ')[[1]][-1]
      summary_mat <- as.matrix(read.table(summary_file, 
                                          row.names = row.names, 
                                          col.names = col.names))
      
      }
    }
  assign(paste("PAML", paste(toString(IGC.geo), ".0", sep = ""), "summary", sep = "_"), summary_mat)
  }

## Now read simulation result

sim.path <- paste("SimulationSummary", paralog, "",sep = "/")
for (IGC.geo in IGC.geo.list){
  summary_mat <- NULL
  IGC.geo.path <- paste("IGCgeo_", toString(IGC.geo), ".0/", sep = "")
  file.prefix <- paste(paralog, "MG94_geo", paste(toString(IGC.geo), ".0", sep = ""), "Sim", sep = "_")
  file.suffix <- "summary.txt"
  for (sim.num in 0:(num.sim - 1)){
    file.name <- paste(file.prefix, toString(sim.num), file.suffix, sep = "_")
    summary_file <- paste(IGC.path, sim.path, IGC.geo.path, file.name, sep = "")
    if (file.exists(summary_file)){
      all <- readLines(summary_file, n = -1)
      col.names <- paste("geo", paste(toString(IGC.geo), ".0", sep = ""), "sim", toString(sim.num), sep = "_")
      row.names <- strsplit(all[length(all)], ' ')[[1]][-1]
      summary_mat <- cbind(summary_mat, as.matrix(read.table(summary_file, 
                                                             row.names = row.names, 
                                                             col.names = col.names)))        
      }
    }
  assign(paste("geo", paste(toString(IGC.geo), ".0", sep = ""), "summary", sep = "_"), summary_mat)
  }

# Now read in 'real'  % changes due to IGC in the simulation
data.path <- paste(paralog, "",sep = "/")
for (IGC.geo in IGC.geo.list){
  summary_mat <- NULL
  IGC.geo.path <- paste("IGCgeo_", toString(IGC.geo), ".0/", sep = "")
  file.prefix <- paste(paralog, "MG94_geo", paste(toString(IGC.geo), ".0", sep = ""), "Sim", sep = "_")
  file.suffix <- "short.log"
  for (sim.num in 0:(num.sim - 1)){
    file.name <- paste(file.prefix, toString(sim.num), file.suffix, sep = "_")
    file.name <- paste("sim", paste(toString(sim.num), "/", file.prefix, sep = ""),
                       toString(sim.num), file.suffix, sep = "_")
    summary_file <- paste(IGC.path, data.path, IGC.geo.path, file.name, sep = "")
    if (file.exists(summary_file)){
      all <- readLines(summary_file, n = -1)
      col.names <- paste("geo", paste(toString(IGC.geo), ".0", sep = ""), "sim", toString(sim.num), sep = "_")
      row.names <- strsplit(all[length(all)], ' ')[[1]][-1]
      summary_mat <- cbind(summary_mat, as.matrix(read.table(summary_file, 
                                                             row.names = row.names, 
                                                             col.names = col.names)))        
      }
    }
  assign(paste("pIGC", paste(toString(IGC.geo), ".0", sep = ""), "summary", sep = "_"), summary_mat)
  }

Now analyze the simulation results

parameters used in generating datasets are:

pi_a = 0.2983654, pi_c = 0.2095729, pi_g = 0.1871536, pi_t = 0.3049081

kappa = 8.4043336

omega = 1.0

tau = 1.409408 (0.42)

Tau estimate summary

# geo_1.0
summary(geo_1.0_summary["tau", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.726   1.060   1.180   1.240   1.390   1.970
sd(geo_1.0_summary["tau", ])
## [1] 0.2652
# geo_3.0
summary(geo_3.0_summary["tau", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.771   1.050   1.320   1.310   1.510   2.300
sd(geo_3.0_summary["tau", ])
## [1] 0.2959
# geo_10.0
summary(geo_10.0_summary["tau", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.588   1.130   1.430   1.400   1.670   2.260
sd(geo_10.0_summary["tau", ])
## [1] 0.3729
# geo_50.0
summary(geo_50.0_summary["tau", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.534   1.070   1.400   1.480   1.870   2.750
sd(geo_50.0_summary["tau", ])
## [1] 0.5685
# geo_100.0
summary(geo_100.0_summary["tau", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.115   0.922   1.340   1.550   2.130   5.370
sd(geo_100.0_summary["tau", ])
## [1] 0.8928
# geo_500.0
summary(geo_500.0_summary["tau", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.39    1.84    1.92    3.01    8.24
sd(geo_500.0_summary["tau", ])
## [1] 1.68
sd.list <- c(sd(geo_1.0_summary["tau", ]), sd(geo_3.0_summary["tau", ]), 
             sd(geo_10.0_summary["tau", ]),sd(geo_50.0_summary["tau", ]),
             sd(geo_100.0_summary["tau", ]), sd(geo_500.0_summary["tau", ]))
plot(IGC.geo.list, sd.list)
abline(h = 0.42)

plot of chunk unnamed-chunk-2

mean.list <- c(mean(geo_1.0_summary["tau", ]), mean(geo_1.0_summary["tau", ]),
               mean(geo_10.0_summary["tau", ]), mean(geo_50.0_summary["tau", ]), 
               mean(geo_100.0_summary["tau", ]), mean(geo_500.0_summary["tau", ]))
plot(IGC.geo.list, mean.list)
abline(h = 1.409408)

plot of chunk unnamed-chunk-2

Now summarized % changes due to IGC in simulation estimates

It was estimated 0.2131 (0.02884)

percent.IGC.geo.1.0 <- colSums(geo_1.0_summary[34:45, ] + geo_1.0_summary[46:57, ]) / (colSums(geo_1.0_summary[34:45, ] + geo_1.0_summary[46:57, ] + geo_1.0_summary[58:69, ]))
summary(percent.IGC.geo.1.0)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.127   0.175   0.195   0.192   0.208   0.243
sd(percent.IGC.geo.1.0)
## [1] 0.02352
percent.IGC.geo.3.0 <- colSums(geo_3.0_summary[34:45, ] + geo_3.0_summary[46:57, ]) / (colSums(geo_3.0_summary[34:45, ] + geo_3.0_summary[46:57, ] + geo_3.0_summary[58:69, ]))
summary(percent.IGC.geo.3.0)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.127   0.190   0.211   0.206   0.226   0.273
sd(percent.IGC.geo.3.0)
## [1] 0.02683
percent.IGC.geo.10.0 <- colSums(geo_10.0_summary[34:45, ] + geo_10.0_summary[46:57, ]) / (colSums(geo_10.0_summary[34:45, ] + geo_10.0_summary[46:57, ] + geo_10.0_summary[58:69, ]))
summary(percent.IGC.geo.10.0)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.119   0.189   0.211   0.210   0.231   0.273
sd(percent.IGC.geo.10.0)
## [1] 0.03256
percent.IGC.geo.50.0 <- colSums(geo_50.0_summary[34:45, ] + geo_50.0_summary[46:57, ]) / (colSums(geo_50.0_summary[34:45, ] + geo_50.0_summary[46:57, ] + geo_50.0_summary[58:69, ]))
summary(percent.IGC.geo.50.0)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0918  0.1780  0.2170  0.2180  0.2600  0.3670
sd(percent.IGC.geo.50.0)
## [1] 0.05877
percent.IGC.geo.100.0 <- colSums(geo_100.0_summary[34:45, ] + geo_100.0_summary[46:57, ]) / (colSums(geo_100.0_summary[34:45, ] + geo_100.0_summary[46:57, ] + geo_100.0_summary[58:69, ]))
summary(percent.IGC.geo.100.0)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0217  0.1570  0.2150  0.2100  0.2640  0.3870
sd(percent.IGC.geo.100.0)
## [1] 0.07892
percent.IGC.geo.500.0 <- colSums(geo_500.0_summary[34:45, ] + geo_500.0_summary[46:57, ]) / (colSums(geo_500.0_summary[34:45, ] + geo_500.0_summary[46:57, ] + geo_500.0_summary[58:69, ]))
summary(percent.IGC.geo.500.0)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0767  0.2390  0.2100  0.3190  0.4590
sd(percent.IGC.geo.500.0)
## [1] 0.1381
# plot mean % changes due to IGC v.s. IGC_geo
mean.percent.IGC.list <- c(mean(percent.IGC.geo.1.0), mean(percent.IGC.geo.3.0), 
                           mean(percent.IGC.geo.10.0), mean(percent.IGC.geo.50.0), 
                           mean(percent.IGC.geo.100.0), mean(percent.IGC.geo.500.0))
plot(IGC.geo.list, mean.percent.IGC.list)
abline(h = 0.2131)

plot of chunk unnamed-chunk-3

sd.percent.IGC.list <- c(sd(percent.IGC.geo.1.0), sd(percent.IGC.geo.3.0), 
                         sd(percent.IGC.geo.10.0), sd(percent.IGC.geo.50.0), 
                         sd(percent.IGC.geo.100.0), sd(percent.IGC.geo.500.0))

plot(IGC.geo.list, sd.percent.IGC.list)
abline(h = 0.02884)

plot of chunk unnamed-chunk-3

Now get histograms of tau estimate

# geo_1.0
hist(geo_1.0_summary["tau",])

plot of chunk unnamed-chunk-4

# geo_3.0
hist(geo_3.0_summary["tau",])

plot of chunk unnamed-chunk-4

# geo_10.0
hist(geo_10.0_summary["tau",])

plot of chunk unnamed-chunk-4

# geo_50.0
hist(geo_50.0_summary["tau",])

plot of chunk unnamed-chunk-4

# geo_100.0
hist(geo_100.0_summary["tau",])

plot of chunk unnamed-chunk-4

# geo_500.0
hist(geo_500.0_summary["tau",])

plot of chunk unnamed-chunk-4

Now get histograms of % change from IGC from estimate

# geo_1.0
hist(percent.IGC.geo.1.0)

plot of chunk unnamed-chunk-5

# geo_3.0
hist(percent.IGC.geo.3.0)

plot of chunk unnamed-chunk-5

# geo_10.0
hist(percent.IGC.geo.10.0)

plot of chunk unnamed-chunk-5

# geo_50.0
hist(percent.IGC.geo.50.0)

plot of chunk unnamed-chunk-5

# geo_100.0
hist(percent.IGC.geo.100.0)

plot of chunk unnamed-chunk-5

# geo_500.0
hist(percent.IGC.geo.500.0)

plot of chunk unnamed-chunk-5

Now summarize real % change due to IGC in simulation

It was estimated 0.2131 (0.02884) in real data set

pIGC.real.geo.1.0 <- pIGC_1.0_summary[1, ]
summary(pIGC.real.geo.1.0)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.163   0.203   0.212   0.214   0.227   0.268
sd(pIGC.real.geo.1.0)
## [1] 0.01825
pIGC.real.geo.3.0 <- pIGC_3.0_summary[1, ]
summary(pIGC.real.geo.3.0)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.170   0.201   0.215   0.216   0.227   0.272
sd(pIGC.real.geo.3.0)
## [1] 0.02085
pIGC.real.geo.10.0 <- pIGC_10.0_summary[1, ]
summary(pIGC.real.geo.10.0)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.146   0.190   0.211   0.211   0.233   0.268
sd(pIGC.real.geo.10.0)
## [1] 0.02702
pIGC.real.geo.50.0 <- pIGC_50.0_summary[1, ]
summary(pIGC.real.geo.50.0)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0984  0.1800  0.2160  0.2170  0.2520  0.3670
sd(pIGC.real.geo.50.0)
## [1] 0.05142
pIGC.real.geo.100.0 <- pIGC_100.0_summary[1, ]
summary(pIGC.real.geo.100.0)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.035   0.169   0.207   0.205   0.244   0.351
sd(pIGC.real.geo.100.0)
## [1] 0.06616
pIGC.real.geo.500.0 <- pIGC_500.0_summary[1, ]
summary(pIGC.real.geo.500.0)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0987  0.2190  0.1960  0.2780  0.4090
sd(pIGC.real.geo.500.0)
## [1] 0.1144
# plot mean % changes due to IGC v.s. IGC_geo
mean.pIGC.real.list <- c(mean(pIGC.real.geo.1.0), mean(pIGC.real.geo.3.0), 
                         mean(pIGC.real.geo.10.0), mean(pIGC.real.geo.50.0), 
                         mean(pIGC.real.geo.100.0), mean(pIGC.real.geo.500.0))
plot(IGC.geo.list, mean.pIGC.real.list)
abline(h = 0.2131)

plot of chunk unnamed-chunk-6

sd.pIGC.real.list <- c(sd(pIGC.real.geo.1.0), sd(pIGC.real.geo.3.0), 
                       sd(pIGC.real.geo.10.0), sd(pIGC.real.geo.50.0), 
                       sd(pIGC.real.geo.100.0), sd(pIGC.real.geo.500.0))

plot(IGC.geo.list, sd.pIGC.real.list)
abline(h = 0.02884)

plot of chunk unnamed-chunk-6

Now get histograms of real % change due to IGC from simulation

# geo_1.0
hist(pIGC.real.geo.1.0)

plot of chunk unnamed-chunk-7

# geo_3.0
hist(pIGC.real.geo.3.0)

plot of chunk unnamed-chunk-7

# geo_10.0
hist(pIGC.real.geo.10.0)

plot of chunk unnamed-chunk-7

# geo_50.0
hist(pIGC.real.geo.50.0)

plot of chunk unnamed-chunk-7

# geo_100.0
hist(pIGC.real.geo.100.0)

plot of chunk unnamed-chunk-7

# geo_500.0
hist(pIGC.real.geo.500.0)

plot of chunk unnamed-chunk-7

Now summarize difference between real % change due to IGC with estimated %

# geo_1.0
diff.pIGC.geo.1.0 <- percent.IGC.geo.1.0 - pIGC.real.geo.1.0
summary(diff.pIGC.geo.1.0)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.08770 -0.03200 -0.02290 -0.02200 -0.00705  0.03080
sd(diff.pIGC.geo.1.0)
## [1] 0.02085
# geo_3.0
diff.pIGC.geo.3.0 <- percent.IGC.geo.3.0 - pIGC.real.geo.3.0
summary(diff.pIGC.geo.3.0)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.06280 -0.02190 -0.01060 -0.01020  0.00473  0.02960
sd(diff.pIGC.geo.3.0)
## [1] 0.01954
# geo_10.0
diff.pIGC.geo.10.0 <- percent.IGC.geo.10.0 - pIGC.real.geo.10.0
summary(diff.pIGC.geo.10.0)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.05970 -0.01490 -0.00248 -0.00161  0.01320  0.05940
sd(diff.pIGC.geo.10.0)
## [1] 0.02033
# geo_50.0
diff.pIGC.geo.50.0 <- percent.IGC.geo.50.0 - pIGC.real.geo.50.0
summary(diff.pIGC.geo.50.0)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.05210 -0.01900  0.00065  0.00021  0.01820  0.06290
sd(diff.pIGC.geo.50.0)
## [1] 0.025
# geo_100.0
diff.pIGC.geo.100.0 <- percent.IGC.geo.100.0 - pIGC.real.geo.100.0
summary(diff.pIGC.geo.100.0)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.06750 -0.01260  0.00435  0.00487  0.02260  0.07110
sd(diff.pIGC.geo.100.0)
## [1] 0.02836
# geo_500.0
diff.pIGC.geo.500.0 <- percent.IGC.geo.500.0 - pIGC.real.geo.500.0
summary(diff.pIGC.geo.500.0)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.0670 -0.0167  0.0157  0.0136  0.0374  0.1220
sd(diff.pIGC.geo.500.0)
## [1] 0.04055
# plot mean % changes due to IGC v.s. IGC_geo
mean.diff.pIGC.list <- c(mean(diff.pIGC.geo.1.0), mean(diff.pIGC.geo.3.0), 
                         mean(diff.pIGC.geo.10.0), mean(diff.pIGC.geo.50.0), 
                         mean(diff.pIGC.geo.100.0), mean(diff.pIGC.geo.500.0))
plot(IGC.geo.list, mean.diff.pIGC.list)
abline(h = 0.0)

plot of chunk unnamed-chunk-8

sd.diff.pIGC.list <- c(sd(diff.pIGC.geo.1.0), sd(diff.pIGC.geo.3.0), 
                       sd(diff.pIGC.geo.10.0), sd(diff.pIGC.geo.50.0), 
                       sd(diff.pIGC.geo.100.0), sd(diff.pIGC.geo.500.0))

plot(IGC.geo.list, sd.diff.pIGC.list)
abline(h = 0.02884)

plot of chunk unnamed-chunk-8

Now get histograms of difference of % IGC from ‘real’ and estimate

# geo_1.0
hist(diff.pIGC.geo.1.0)

plot of chunk unnamed-chunk-9

# geo_3.0
hist(diff.pIGC.geo.3.0)

plot of chunk unnamed-chunk-9

# geo_10.0
hist(diff.pIGC.geo.10.0)

plot of chunk unnamed-chunk-9

# geo_50.0
hist(diff.pIGC.geo.50.0)

plot of chunk unnamed-chunk-9

# geo_100.0
hist(diff.pIGC.geo.100.0)

plot of chunk unnamed-chunk-9

# geo_500.0
hist(diff.pIGC.geo.500.0)

plot of chunk unnamed-chunk-9

update Jan 29th, 2016

Now added PAML estimate of all simulated datasets. Start to show more comparisons.

Omega estimate from PAML

# geo_1.0
summary(PAML_1.0_summary["omega", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.641   0.907   0.985   0.981   1.070   1.320
sd(PAML_1.0_summary["omega", ])
## [1] 0.1416
# geo_3.0
summary(PAML_3.0_summary["omega", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.672   0.915   1.000   1.030   1.140   1.510
sd(PAML_3.0_summary["omega", ])
## [1] 0.1688
# geo_10.0
summary(PAML_10.0_summary["omega", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.685   0.897   1.000   1.010   1.100   1.460
sd(PAML_10.0_summary["omega", ])
## [1] 0.1639
# geo_50.0
summary(PAML_50.0_summary["omega", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.774   0.945   1.040   1.070   1.160   1.590
sd(PAML_50.0_summary["omega", ])
## [1] 0.1669
# geo_100.0
summary(PAML_100.0_summary["omega", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.757   0.914   1.010   1.040   1.140   1.620
sd(PAML_100.0_summary["omega", ])
## [1] 0.1663
# geo_500.0
summary(PAML_500.0_summary["omega", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.654   0.911   0.996   1.010   1.090   1.700
sd(PAML_500.0_summary["omega", ])
## [1] 0.1668
PAML.omega.mean <- c(mean(PAML_1.0_summary["omega", ]), mean(PAML_3.0_summary["omega", ]), mean(PAML_10.0_summary["omega", ]),
                     mean(PAML_50.0_summary["omega", ]), mean(PAML_100.0_summary["omega", ]), 
                     mean(PAML_500.0_summary["omega", ]))
PAML.omega.sd <- c(sd(PAML_1.0_summary["omega", ]), sd(PAML_3.0_summary["omega", ]), sd(PAML_10.0_summary["omega", ]),
                   sd(PAML_50.0_summary["omega", ]), sd(PAML_100.0_summary["omega", ]), 
                   sd(PAML_500.0_summary["omega", ]))

plot(IGC.geo.list, PAML.omega.mean)
abline(h = 1.0)

plot of chunk unnamed-chunk-10

plot(IGC.geo.list, PAML.omega.sd)

plot of chunk unnamed-chunk-10

Omega estimate from IGC + MG94

# geo_1.0
summary(geo_1.0_summary["omega", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.638   0.890   0.976   0.969   1.050   1.250
sd(geo_1.0_summary["omega", ])
## [1] 0.1266
# geo_3.0
summary(geo_3.0_summary["omega", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.726   0.909   0.989   1.020   1.090   1.400
sd(geo_3.0_summary["omega", ])
## [1] 0.1478
# geo_10.0
summary(geo_10.0_summary["omega", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.684   0.905   1.000   1.010   1.090   1.360
sd(geo_10.0_summary["omega", ])
## [1] 0.1408
# geo_50.0
summary(geo_50.0_summary["omega", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.804   0.963   1.040   1.070   1.130   1.550
sd(geo_50.0_summary["omega", ])
## [1] 0.151
# geo_100.0
summary(geo_100.0_summary["omega", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.739   0.907   1.030   1.030   1.130   1.470
sd(geo_100.0_summary["omega", ])
## [1] 0.1498
# geo_500.0
summary(geo_500.0_summary["omega", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.726   0.893   0.989   1.010   1.090   1.510
sd(geo_500.0_summary["omega", ])
## [1] 0.143
geo.omega.mean <- c(mean(geo_1.0_summary["omega", ]), mean(geo_3.0_summary["omega", ]), mean(geo_10.0_summary["omega", ]),
                    mean(geo_50.0_summary["omega", ]), mean(geo_100.0_summary["omega", ]), 
                    mean(geo_500.0_summary["omega", ]))
geo.omega.sd <- c(sd(geo_1.0_summary["omega", ]), sd(geo_3.0_summary["omega", ]), sd(geo_10.0_summary["omega", ]),
                  sd(geo_50.0_summary["omega", ]), sd(geo_100.0_summary["omega", ]), 
                  sd(geo_500.0_summary["omega", ]))

plot(IGC.geo.list, geo.omega.mean)
abline(h = 1.0)

plot of chunk unnamed-chunk-11

plot(IGC.geo.list, geo.omega.sd)

plot of chunk unnamed-chunk-11

kappa estimate from PAML

# geo_1.0
summary(PAML_1.0_summary["kappa", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.25    7.27    8.41    8.35    9.28   12.70
sd(PAML_1.0_summary["kappa", ])
## [1] 1.468
# geo_3.0
summary(PAML_3.0_summary["kappa", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.03    7.16    8.33    8.40    9.11   14.70
sd(PAML_3.0_summary["kappa", ])
## [1] 1.538
# geo_10.0
summary(PAML_10.0_summary["kappa", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.40    7.48    8.28    8.38    9.14   14.00
sd(PAML_10.0_summary["kappa", ])
## [1] 1.534
# geo_50.0
summary(PAML_50.0_summary["kappa", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.14    7.51    8.37    8.65    9.72   15.60
sd(PAML_50.0_summary["kappa", ])
## [1] 1.719
# geo_100.0
summary(PAML_100.0_summary["kappa", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.81    7.38    8.38    8.53    9.43   12.60
sd(PAML_100.0_summary["kappa", ])
## [1] 1.462
# geo_500.0
summary(PAML_500.0_summary["kappa", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.78    7.56    8.21    8.33    9.20   12.00
sd(PAML_500.0_summary["kappa", ])
## [1] 1.283
PAML.kappa.mean <- c(mean(PAML_1.0_summary["kappa", ]), mean(PAML_3.0_summary["kappa", ]), mean(PAML_10.0_summary["kappa", ]),
                     mean(PAML_50.0_summary["kappa", ]), mean(PAML_100.0_summary["kappa", ]), 
                     mean(PAML_500.0_summary["kappa", ]))
PAML.kappa.sd <- c(sd(PAML_1.0_summary["kappa", ]), sd(PAML_3.0_summary["kappa", ]), sd(PAML_10.0_summary["kappa", ]),
                   sd(PAML_50.0_summary["kappa", ]), sd(PAML_100.0_summary["kappa", ]), 
                   sd(PAML_500.0_summary["kappa", ]))

plot(IGC.geo.list, PAML.kappa.mean)
abline(h = 8.4043336)

plot of chunk unnamed-chunk-12

plot(IGC.geo.list, PAML.kappa.sd)

plot of chunk unnamed-chunk-12

kappa estimate from IGC + MG94

# geo_1.0
summary(geo_1.0_summary["kappa", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.18    7.64    8.59    8.55    9.50   12.20
sd(geo_1.0_summary["kappa", ])
## [1] 1.434
# geo_3.0
summary(geo_3.0_summary["kappa", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.14    7.52    8.49    8.63    9.24   13.80
sd(geo_3.0_summary["kappa", ])
## [1] 1.486
# geo_10.0
summary(geo_10.0_summary["kappa", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.54    7.65    8.38    8.54    9.22   14.10
sd(geo_10.0_summary["kappa", ])
## [1] 1.506
# geo_50.0
summary(geo_50.0_summary["kappa", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.73    7.69    8.61    8.71    9.60   13.20
sd(geo_50.0_summary["kappa", ])
## [1] 1.576
# geo_100.0
summary(geo_100.0_summary["kappa", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.18    7.47    8.48    8.55    9.51   13.30
sd(geo_100.0_summary["kappa", ])
## [1] 1.41
# geo_500.0
summary(geo_500.0_summary["kappa", ])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.01    7.56    8.35    8.41    9.12   12.20
sd(geo_500.0_summary["kappa", ])
## [1] 1.281
geo.kappa.mean <- c(mean(geo_1.0_summary["kappa", ]), mean(geo_3.0_summary["kappa", ]), mean(geo_10.0_summary["kappa", ]),
                    mean(geo_50.0_summary["kappa", ]), mean(geo_100.0_summary["kappa", ]), 
                    mean(geo_500.0_summary["kappa", ]))
geo.kappa.sd <- c(sd(geo_1.0_summary["kappa", ]), sd(geo_3.0_summary["kappa", ]), sd(geo_10.0_summary["kappa", ]),
                  sd(geo_50.0_summary["kappa", ]), sd(geo_100.0_summary["kappa", ]), 
                  sd(geo_500.0_summary["kappa", ]))

plot(IGC.geo.list, geo.kappa.mean)
abline(h = 8.4043336)

plot of chunk unnamed-chunk-13

plot(IGC.geo.list, geo.kappa.sd)

plot of chunk unnamed-chunk-13

Now start to look at branch length estimates

Branch lengths used in simulation:

Branch blen
N0_N1: 0.0197240946542
N0_kluyveri 0.215682181791
N1_N2 0.20925129872
N1_castellii 0.171684721483
N2_N3 0.0257112589202
N2_bayanus 0.0266075664688
N3_N4 0.0321083243449
N3_kudriavzevii 0.0853588718458
N4_N5 0.024947887926
N4_mikatae 0.0566627496729
N5_cerevisiae 0.0581451177847
N5_paradoxus 0.0218788166581
# I am too lazy to change all these codes
IGC.geo.list <- IGC.geo.list[-1]
# N0_N1
IGC.N0.N1.mean.list <- c(mean(geo_3.0_summary["(N0,N1)", ]), mean(geo_10.0_summary["(N0,N1)", ]),
                         mean(geo_50.0_summary["(N0,N1)", ]), mean(geo_100.0_summary["(N0,N1)", ]),
                         mean(geo_500.0_summary["(N0,N1)", ]))
IGC.N0.N1.sd.list <- c(sd(geo_3.0_summary["(N0,N1)", ]), sd(geo_10.0_summary["(N0,N1)", ]),
                       sd(geo_50.0_summary["(N0,N1)", ]), sd(geo_100.0_summary["(N0,N1)", ]),
                       sd(geo_500.0_summary["(N0,N1)", ]))

PAML.N0.N1.paralog1.mean.list <- c(mean(PAML_3.0_summary["N1_N2", ]), 
                                   mean(PAML_10.0_summary["N1_N2", ]),
                                   mean(PAML_50.0_summary["N1_N2", ]),
                                   mean(PAML_100.0_summary["N1_N2", ]),
                                   mean(PAML_500.0_summary["N1_N2", ]))
PAML.N0.N1.paralog1.sd.list <- c(sd(PAML_3.0_summary["N1_N2", ]), 
                                 sd(PAML_10.0_summary["N1_N2", ]),
                                 sd(PAML_50.0_summary["N1_N2", ]),
                                 sd(PAML_100.0_summary["N1_N2", ]),
                                 sd(PAML_500.0_summary["N1_N2", ]))
PAML.N0.N1.paralog2.mean.list <- c(mean(PAML_3.0_summary["N1_N7", ]), 
                                   mean(PAML_10.0_summary["N1_N7", ]),
                                   mean(PAML_50.0_summary["N1_N7", ]),
                                   mean(PAML_100.0_summary["N1_N7", ]),
                                   mean(PAML_500.0_summary["N1_N7", ]))
PAML.N0.N1.paralog2.sd.list <- c(sd(PAML_3.0_summary["N1_N7", ]), 
                                 sd(PAML_10.0_summary["N1_N7", ]),
                                 sd(PAML_50.0_summary["N1_N7", ]),
                                 sd(PAML_100.0_summary["N1_N7", ]),
                                 sd(PAML_500.0_summary["N1_N7", ]))
# N0_kluyveri
IGC.N0.kluyveri.mean.list <- c(mean(geo_3.0_summary["(N0,kluyveri)", ]), mean(geo_10.0_summary["(N0,kluyveri)", ]),
                               mean(geo_50.0_summary["(N0,kluyveri)", ]), mean(geo_100.0_summary["(N0,kluyveri)", ]),
                               mean(geo_500.0_summary["(N0,kluyveri)", ]))
IGC.N0.kluyveri.sd.list <- c(sd(geo_3.0_summary["(N0,kluyveri)", ]), sd(geo_10.0_summary["(N0,kluyveri)", ]),
                             sd(geo_50.0_summary["(N0,kluyveri)", ]), sd(geo_100.0_summary["(N0,kluyveri)", ]),
                             sd(geo_500.0_summary["(N0,kluyveri)", ]))
PAML.N0.kluyveri.paralog1.mean.list <- c(mean(PAML_3.0_summary["N0_kluyveriYDR418W", ]), 
                                         mean(PAML_10.0_summary["N0_kluyveriYDR418W", ]),
                                         mean(PAML_50.0_summary["N0_kluyveriYDR418W", ]),
                                         mean(PAML_100.0_summary["N0_kluyveriYDR418W", ]),
                                         mean(PAML_500.0_summary["N0_kluyveriYDR418W", ]))
PAML.N0.kluyveri.paralog1.sd.list <- c(sd(PAML_3.0_summary["N0_kluyveriYDR418W", ]), 
                                       sd(PAML_10.0_summary["N0_kluyveriYDR418W", ]),
                                       sd(PAML_50.0_summary["N0_kluyveriYDR418W", ]),
                                       sd(PAML_100.0_summary["N0_kluyveriYDR418W", ]),
                                       sd(PAML_500.0_summary["N0_kluyveriYDR418W", ]))
# N1_N2
IGC.N1.N2.mean.list <- c(mean(geo_3.0_summary["(N1,N2)", ]), mean(geo_10.0_summary["(N1,N2)", ]),
                         mean(geo_50.0_summary["(N1,N2)", ]), mean(geo_100.0_summary["(N1,N2)", ]),
                         mean(geo_500.0_summary["(N1,N2)", ]))
IGC.N1.N2.sd.list <- c(sd(geo_3.0_summary["(N1,N2)", ]), sd(geo_10.0_summary["(N1,N2)", ]),
                       sd(geo_50.0_summary["(N1,N2)", ]), sd(geo_100.0_summary["(N1,N2)", ]),
                       sd(geo_500.0_summary["(N1,N2)", ]))

PAML.N1.N2.paralog1.mean.list <- c(mean(PAML_3.0_summary["N2_N3", ]), 
                                   mean(PAML_10.0_summary["N2_N3", ]),
                                   mean(PAML_50.0_summary["N2_N3", ]),
                                   mean(PAML_100.0_summary["N2_N3", ]),
                                   mean(PAML_500.0_summary["N2_N3", ]))
PAML.N1.N2.paralog1.sd.list <- c(sd(PAML_3.0_summary["N2_N3", ]), 
                                 sd(PAML_10.0_summary["N2_N3", ]),
                                 sd(PAML_50.0_summary["N2_N3", ]),
                                 sd(PAML_100.0_summary["N2_N3", ]),
                                 sd(PAML_500.0_summary["N2_N3", ]))
PAML.N1.N2.paralog2.mean.list <- c(mean(PAML_3.0_summary["N7_N8", ]), 
                                   mean(PAML_10.0_summary["N7_N8", ]),
                                   mean(PAML_50.0_summary["N7_N8", ]),
                                   mean(PAML_100.0_summary["N7_N8", ]),
                                   mean(PAML_500.0_summary["N7_N8", ]))
PAML.N1.N2.paralog2.sd.list <- c(sd(PAML_3.0_summary["N7_N8", ]), 
                                 sd(PAML_10.0_summary["N7_N8", ]),
                                 sd(PAML_50.0_summary["N7_N8", ]),
                                 sd(PAML_100.0_summary["N7_N8", ]),
                                 sd(PAML_500.0_summary["N7_N8", ]))

# N1_castellii
IGC.N1.castellii.mean.list <- c(mean(geo_3.0_summary["(N1,castellii)", ]), mean(geo_10.0_summary["(N1,castellii)", ]),
                                mean(geo_50.0_summary["(N1,castellii)", ]), mean(geo_100.0_summary["(N1,castellii)", ]),
                                mean(geo_500.0_summary["(N1,castellii)", ]))
IGC.N1.castellii.sd.list <- c(sd(geo_3.0_summary["(N1,castellii)", ]), sd(geo_10.0_summary["(N1,castellii)", ]),
                              sd(geo_50.0_summary["(N1,castellii)", ]), sd(geo_100.0_summary["(N1,castellii)", ]),
                              sd(geo_500.0_summary["(N1,castellii)", ]))

PAML.N1.castellii.paralog1.mean.list <- c(mean(PAML_3.0_summary["N2_castelliiYDR418W", ]), 
                                          mean(PAML_10.0_summary["N2_castelliiYDR418W", ]),
                                          mean(PAML_50.0_summary["N2_castelliiYDR418W", ]),
                                          mean(PAML_100.0_summary["N2_castelliiYDR418W", ]),
                                          mean(PAML_500.0_summary["N2_castelliiYDR418W", ]))
PAML.N1.castellii.paralog1.sd.list <- c(sd(PAML_3.0_summary["N2_castelliiYDR418W", ]), 
                                        sd(PAML_10.0_summary["N2_castelliiYDR418W", ]),
                                        sd(PAML_50.0_summary["N2_castelliiYDR418W", ]),
                                        sd(PAML_100.0_summary["N2_castelliiYDR418W", ]),
                                        sd(PAML_500.0_summary["N2_castelliiYDR418W", ]))

PAML.N1.castellii.paralog2.mean.list <- c(mean(PAML_3.0_summary["N7_castelliiYEL054C", ]), 
                                          mean(PAML_10.0_summary["N7_castelliiYEL054C", ]),
                                          mean(PAML_50.0_summary["N7_castelliiYEL054C", ]),
                                          mean(PAML_100.0_summary["N7_castelliiYEL054C", ]),
                                          mean(PAML_500.0_summary["N7_castelliiYEL054C", ]))
PAML.N1.castellii.paralog2.sd.list <- c(sd(PAML_3.0_summary["N7_castelliiYEL054C", ]), 
                                        sd(PAML_10.0_summary["N7_castelliiYEL054C", ]),
                                        sd(PAML_50.0_summary["N7_castelliiYEL054C", ]),
                                        sd(PAML_100.0_summary["N7_castelliiYEL054C", ]),
                                        sd(PAML_500.0_summary["N7_castelliiYEL054C", ]))
# N2_N3
IGC.N2.N3.mean.list <- c(mean(geo_3.0_summary["(N2,N3)", ]), mean(geo_10.0_summary["(N2,N3)", ]),
                         mean(geo_50.0_summary["(N2,N3)", ]), mean(geo_100.0_summary["(N2,N3)", ]),
                         mean(geo_500.0_summary["(N2,N3)", ]))
IGC.N2.N3.sd.list <- c(sd(geo_3.0_summary["(N2,N3)", ]), sd(geo_10.0_summary["(N2,N3)", ]),
                       sd(geo_50.0_summary["(N2,N3)", ]), sd(geo_100.0_summary["(N2,N3)", ]),
                       sd(geo_500.0_summary["(N2,N3)", ]))

PAML.N2.N3.paralog1.mean.list <- c(mean(PAML_3.0_summary["N3_N4", ]), 
                                   mean(PAML_10.0_summary["N3_N4", ]),
                                   mean(PAML_50.0_summary["N3_N4", ]),
                                   mean(PAML_100.0_summary["N3_N4", ]),
                                   mean(PAML_500.0_summary["N3_N4", ]))
PAML.N2.N3.paralog1.sd.list <- c(sd(PAML_3.0_summary["N3_N4", ]), 
                                 sd(PAML_10.0_summary["N3_N4", ]),
                                 sd(PAML_50.0_summary["N3_N4", ]),
                                 sd(PAML_100.0_summary["N3_N4", ]),
                                 sd(PAML_500.0_summary["N3_N4", ]))
PAML.N2.N3.paralog2.mean.list <- c(mean(PAML_3.0_summary["N8_N9", ]), 
                                   mean(PAML_10.0_summary["N8_N9", ]),
                                   mean(PAML_50.0_summary["N8_N9", ]),
                                   mean(PAML_100.0_summary["N8_N9", ]),
                                   mean(PAML_500.0_summary["N8_N9", ]))
PAML.N2.N3.paralog2.sd.list <- c(sd(PAML_3.0_summary["N8_N9", ]), 
                                 sd(PAML_10.0_summary["N8_N9", ]),
                                 sd(PAML_50.0_summary["N8_N9", ]),
                                 sd(PAML_100.0_summary["N8_N9", ]),
                                 sd(PAML_500.0_summary["N8_N9", ]))
# N2_bayanus
IGC.N2.bayanus.mean.list <- c(mean(geo_3.0_summary["(N2,bayanus)", ]), mean(geo_10.0_summary["(N2,bayanus)", ]),
                              mean(geo_50.0_summary["(N2,bayanus)", ]), mean(geo_100.0_summary["(N2,bayanus)", ]),
                              mean(geo_500.0_summary["(N2,bayanus)", ]))
IGC.N2.bayanus.sd.list <- c(sd(geo_3.0_summary["(N2,bayanus)", ]), sd(geo_10.0_summary["(N2,bayanus)", ]),
                            sd(geo_50.0_summary["(N2,bayanus)", ]), sd(geo_100.0_summary["(N2,bayanus)", ]),
                            sd(geo_500.0_summary["(N2,bayanus)", ]))

PAML.N2.bayanus.paralog1.mean.list <- c(mean(PAML_3.0_summary["N3_bayanusYDR418W", ]), 
                                        mean(PAML_10.0_summary["N3_bayanusYDR418W", ]),
                                        mean(PAML_50.0_summary["N3_bayanusYDR418W", ]),
                                        mean(PAML_100.0_summary["N3_bayanusYDR418W", ]),
                                        mean(PAML_500.0_summary["N3_bayanusYDR418W", ]))
PAML.N2.bayanus.paralog1.sd.list <- c(sd(PAML_3.0_summary["N3_bayanusYDR418W", ]), 
                                      sd(PAML_10.0_summary["N3_bayanusYDR418W", ]),
                                      sd(PAML_50.0_summary["N3_bayanusYDR418W", ]),
                                      sd(PAML_100.0_summary["N3_bayanusYDR418W", ]),
                                      sd(PAML_500.0_summary["N3_bayanusYDR418W", ]))
PAML.N2.bayanus.paralog2.mean.list <- c(mean(PAML_3.0_summary["N8_bayanusYEL054C", ]), 
                                        mean(PAML_10.0_summary["N8_bayanusYEL054C", ]),
                                        mean(PAML_50.0_summary["N8_bayanusYEL054C", ]),
                                        mean(PAML_100.0_summary["N8_bayanusYEL054C", ]),
                                        mean(PAML_500.0_summary["N8_bayanusYEL054C", ]))
PAML.N2.bayanus.paralog2.sd.list <- c(sd(PAML_3.0_summary["N8_bayanusYEL054C", ]), 
                                      sd(PAML_10.0_summary["N8_bayanusYEL054C", ]),
                                      sd(PAML_50.0_summary["N8_bayanusYEL054C", ]),
                                      sd(PAML_100.0_summary["N8_bayanusYEL054C", ]),
                                      sd(PAML_500.0_summary["N8_bayanusYEL054C", ]))

# N3_N4
IGC.N3.N4.mean.list <- c(mean(geo_3.0_summary["(N3,N4)", ]), mean(geo_10.0_summary["(N3,N4)", ]),
                         mean(geo_50.0_summary["(N3,N4)", ]), mean(geo_100.0_summary["(N3,N4)", ]),
                         mean(geo_500.0_summary["(N3,N4)", ]))
IGC.N3.N4.sd.list <- c(sd(geo_3.0_summary["(N3,N4)", ]), sd(geo_10.0_summary["(N3,N4)", ]),
                       sd(geo_50.0_summary["(N3,N4)", ]), sd(geo_100.0_summary["(N3,N4)", ]),
                       sd(geo_500.0_summary["(N3,N4)", ]))
PAML.N3.N4.paralog1.mean.list <- c(mean(PAML_3.0_summary["N4_N5", ]), 
                                   mean(PAML_10.0_summary["N4_N5", ]),
                                   mean(PAML_50.0_summary["N4_N5", ]),
                                   mean(PAML_100.0_summary["N4_N5", ]),
                                   mean(PAML_500.0_summary["N4_N5", ]))
PAML.N3.N4.paralog1.sd.list <- c(sd(PAML_3.0_summary["N4_N5", ]), 
                                 sd(PAML_10.0_summary["N4_N5", ]),
                                 sd(PAML_50.0_summary["N4_N5", ]),
                                 sd(PAML_100.0_summary["N4_N5", ]),
                                 sd(PAML_500.0_summary["N4_N5", ]))
PAML.N3.N4.paralog2.mean.list <- c(mean(PAML_3.0_summary["N9_N10", ]), 
                                   mean(PAML_10.0_summary["N9_N10", ]),
                                   mean(PAML_50.0_summary["N9_N10", ]),
                                   mean(PAML_100.0_summary["N9_N10", ]),
                                   mean(PAML_500.0_summary["N9_N10", ]))
PAML.N3.N4.paralog2.sd.list <- c(sd(PAML_3.0_summary["N9_N10", ]), 
                                 sd(PAML_10.0_summary["N9_N10", ]),
                                 sd(PAML_50.0_summary["N9_N10", ]),
                                 sd(PAML_100.0_summary["N9_N10", ]),
                                 sd(PAML_500.0_summary["N9_N10", ]))
# N3_kudriavzevii
IGC.N3.kudriavzevii.mean.list <- c(mean(geo_3.0_summary["(N3,kudriavzevii)", ]), mean(geo_10.0_summary["(N3,kudriavzevii)", ]),
                                   mean(geo_50.0_summary["(N3,kudriavzevii)", ]), mean(geo_100.0_summary["(N3,kudriavzevii)", ]),
                                   mean(geo_500.0_summary["(N3,kudriavzevii)", ]))
IGC.N3.kudriavzevii.sd.list <- c(sd(geo_3.0_summary["(N3,kudriavzevii)", ]), sd(geo_10.0_summary["(N3,kudriavzevii)", ]),
                                 sd(geo_50.0_summary["(N3,kudriavzevii)", ]), sd(geo_100.0_summary["(N3,kudriavzevii)", ]),
                                 sd(geo_500.0_summary["(N3,kudriavzevii)", ]))
PAML.N3.kudriavzevii.paralog1.mean.list <- c(mean(PAML_3.0_summary["N4_kudriavzeviiYDR418W", ]), 
                                             mean(PAML_10.0_summary["N4_kudriavzeviiYDR418W", ]),
                                             mean(PAML_50.0_summary["N4_kudriavzeviiYDR418W", ]),
                                             mean(PAML_100.0_summary["N4_kudriavzeviiYDR418W", ]),
                                             mean(PAML_500.0_summary["N4_kudriavzeviiYDR418W", ]))
PAML.N3.kudriavzevii.paralog1.sd.list <- c(sd(PAML_3.0_summary["N4_kudriavzeviiYDR418W", ]), 
                                           sd(PAML_10.0_summary["N4_kudriavzeviiYDR418W", ]),
                                           sd(PAML_50.0_summary["N4_kudriavzeviiYDR418W", ]),
                                           sd(PAML_100.0_summary["N4_kudriavzeviiYDR418W", ]),
                                           sd(PAML_500.0_summary["N4_kudriavzeviiYDR418W", ]))
PAML.N3.kudriavzevii.paralog2.mean.list <- c(mean(PAML_3.0_summary["N9_kudriavzeviiYEL054C", ]), 
                                             mean(PAML_10.0_summary["N9_kudriavzeviiYEL054C", ]),
                                             mean(PAML_50.0_summary["N9_kudriavzeviiYEL054C", ]),
                                             mean(PAML_100.0_summary["N9_kudriavzeviiYEL054C", ]),
                                             mean(PAML_500.0_summary["N9_kudriavzeviiYEL054C", ]))
PAML.N3.kudriavzevii.paralog2.sd.list <- c(sd(PAML_3.0_summary["N9_kudriavzeviiYEL054C", ]), 
                                           sd(PAML_10.0_summary["N9_kudriavzeviiYEL054C", ]),
                                           sd(PAML_50.0_summary["N9_kudriavzeviiYEL054C", ]),
                                           sd(PAML_100.0_summary["N9_kudriavzeviiYEL054C", ]),
                                           sd(PAML_500.0_summary["N9_kudriavzeviiYEL054C", ]))
# N4_N5
IGC.N4.N5.mean.list <- c(mean(geo_3.0_summary["(N4,N5)", ]), mean(geo_10.0_summary["(N4,N5)", ]),
                         mean(geo_50.0_summary["(N4,N5)", ]), mean(geo_100.0_summary["(N4,N5)", ]),
                         mean(geo_500.0_summary["(N4,N5)", ]))
IGC.N4.N5.sd.list <- c(sd(geo_3.0_summary["(N4,N5)", ]), sd(geo_10.0_summary["(N4,N5)", ]),
                       sd(geo_50.0_summary["(N4,N5)", ]), sd(geo_100.0_summary["(N4,N5)", ]),
                       sd(geo_500.0_summary["(N4,N5)", ]))
PAML.N4.N5.paralog1.mean.list <- c(mean(PAML_3.0_summary["N5_N6", ]), 
                                   mean(PAML_10.0_summary["N5_N6", ]),
                                   mean(PAML_50.0_summary["N5_N6", ]),
                                   mean(PAML_100.0_summary["N5_N6", ]),
                                   mean(PAML_500.0_summary["N5_N6", ]))
PAML.N4.N5.paralog1.sd.list <- c(sd(PAML_3.0_summary["N5_N6", ]), 
                                 sd(PAML_10.0_summary["N5_N6", ]),
                                 sd(PAML_50.0_summary["N5_N6", ]),
                                 sd(PAML_100.0_summary["N5_N6", ]),
                                 sd(PAML_500.0_summary["N5_N6", ]))
PAML.N4.N5.paralog2.mean.list <- c(mean(PAML_3.0_summary["N10_N11", ]), 
                                   mean(PAML_10.0_summary["N10_N11", ]),
                                   mean(PAML_50.0_summary["N10_N11", ]),
                                   mean(PAML_100.0_summary["N10_N11", ]),
                                   mean(PAML_500.0_summary["N10_N11", ]))
PAML.N4.N5.paralog2.sd.list <- c(sd(PAML_3.0_summary["N10_N11", ]), 
                                 sd(PAML_10.0_summary["N10_N11", ]),
                                 sd(PAML_50.0_summary["N10_N11", ]),
                                 sd(PAML_100.0_summary["N10_N11", ]),
                                 sd(PAML_500.0_summary["N10_N11", ]))
# N4_mikatae
IGC.N4.mikatae.mean.list <- c(mean(geo_3.0_summary["(N4,mikatae)", ]), mean(geo_10.0_summary["(N4,mikatae)", ]),
                              mean(geo_50.0_summary["(N4,mikatae)", ]), mean(geo_100.0_summary["(N4,mikatae)", ]),
                              mean(geo_500.0_summary["(N4,mikatae)", ]))
IGC.N4.mikatae.sd.list <- c(sd(geo_3.0_summary["(N4,mikatae)", ]), sd(geo_10.0_summary["(N4,mikatae)", ]),
                            sd(geo_50.0_summary["(N4,mikatae)", ]), sd(geo_100.0_summary["(N4,mikatae)", ]),
                            sd(geo_500.0_summary["(N4,mikatae)", ]))
PAML.N4.mikatae.paralog1.mean.list <- c(mean(PAML_3.0_summary["N5_mikataeYDR418W", ]), 
                                        mean(PAML_10.0_summary["N5_mikataeYDR418W", ]),
                                        mean(PAML_50.0_summary["N5_mikataeYDR418W", ]),
                                        mean(PAML_100.0_summary["N5_mikataeYDR418W", ]),
                                        mean(PAML_500.0_summary["N5_mikataeYDR418W", ]))
PAML.N4.mikatae.paralog1.sd.list <- c(sd(PAML_3.0_summary["N5_mikataeYDR418W", ]), 
                                      sd(PAML_10.0_summary["N5_mikataeYDR418W", ]),
                                      sd(PAML_50.0_summary["N5_mikataeYDR418W", ]),
                                      sd(PAML_100.0_summary["N5_mikataeYDR418W", ]),
                                      sd(PAML_500.0_summary["N5_mikataeYDR418W", ]))
PAML.N4.mikatae.paralog2.mean.list <- c(mean(PAML_3.0_summary["N10_mikataeYEL054C", ]), 
                                        mean(PAML_10.0_summary["N10_mikataeYEL054C", ]),
                                        mean(PAML_50.0_summary["N10_mikataeYEL054C", ]),
                                        mean(PAML_100.0_summary["N10_mikataeYEL054C", ]),
                                        mean(PAML_500.0_summary["N10_mikataeYEL054C", ]))
PAML.N4.mikatae.paralog2.sd.list <- c(sd(PAML_3.0_summary["N10_mikataeYEL054C", ]), 
                                      sd(PAML_10.0_summary["N10_mikataeYEL054C", ]),
                                      sd(PAML_50.0_summary["N10_mikataeYEL054C", ]),
                                      sd(PAML_100.0_summary["N10_mikataeYEL054C", ]),
                                      sd(PAML_500.0_summary["N10_mikataeYEL054C", ]))
# N5_cerevisiae
IGC.N5.cerevisiae.mean.list <- c(mean(geo_3.0_summary["(N5,cerevisiae)", ]), mean(geo_10.0_summary["(N5,cerevisiae)", ]),
                                 mean(geo_50.0_summary["(N5,cerevisiae)", ]), mean(geo_100.0_summary["(N5,cerevisiae)", ]),
                                 mean(geo_500.0_summary["(N5,cerevisiae)", ]))
IGC.N5.cerevisiae.sd.list <- c(sd(geo_3.0_summary["(N5,cerevisiae)", ]), sd(geo_10.0_summary["(N5,cerevisiae)", ]),
                               sd(geo_50.0_summary["(N5,cerevisiae)", ]), sd(geo_100.0_summary["(N5,cerevisiae)", ]),
                               sd(geo_500.0_summary["(N5,cerevisiae)", ]))
PAML.N5.cerevisiae.paralog1.mean.list <- c(mean(PAML_3.0_summary["N6_cerevisiaeYDR418W", ]), 
                                           mean(PAML_10.0_summary["N6_cerevisiaeYDR418W", ]),
                                           mean(PAML_50.0_summary["N6_cerevisiaeYDR418W", ]),
                                           mean(PAML_100.0_summary["N6_cerevisiaeYDR418W", ]),
                                           mean(PAML_500.0_summary["N6_cerevisiaeYDR418W", ]))
PAML.N5.cerevisiae.paralog1.sd.list <- c(sd(PAML_3.0_summary["N6_cerevisiaeYDR418W", ]), 
                                         sd(PAML_10.0_summary["N6_cerevisiaeYDR418W", ]),
                                         sd(PAML_50.0_summary["N6_cerevisiaeYDR418W", ]),
                                         sd(PAML_100.0_summary["N6_cerevisiaeYDR418W", ]),
                                         sd(PAML_500.0_summary["N6_cerevisiaeYDR418W", ]))
PAML.N5.cerevisiae.paralog2.mean.list <- c(mean(PAML_3.0_summary["N11_cerevisiaeYEL054C", ]), 
                                           mean(PAML_10.0_summary["N11_cerevisiaeYEL054C", ]),
                                           mean(PAML_50.0_summary["N11_cerevisiaeYEL054C", ]),
                                           mean(PAML_100.0_summary["N11_cerevisiaeYEL054C", ]),
                                           mean(PAML_500.0_summary["N11_cerevisiaeYEL054C", ]))
PAML.N5.cerevisiae.paralog2.sd.list <- c(sd(PAML_3.0_summary["N11_cerevisiaeYEL054C", ]), 
                                         sd(PAML_10.0_summary["N11_cerevisiaeYEL054C", ]),
                                         sd(PAML_50.0_summary["N11_cerevisiaeYEL054C", ]),
                                         sd(PAML_100.0_summary["N11_cerevisiaeYEL054C", ]),
                                         sd(PAML_500.0_summary["N11_cerevisiaeYEL054C", ]))
# N5_paradoxus
IGC.N5.paradoxus.mean.list <- c(mean(geo_3.0_summary["(N5,paradoxus)", ]), mean(geo_10.0_summary["(N5,paradoxus)", ]),
                                mean(geo_50.0_summary["(N5,paradoxus)", ]), mean(geo_100.0_summary["(N5,paradoxus)", ]),
                                mean(geo_500.0_summary["(N5,paradoxus)", ]))
IGC.N5.paradoxus.sd.list <- c(sd(geo_3.0_summary["(N5,paradoxus)", ]), sd(geo_10.0_summary["(N5,paradoxus)", ]),
                              sd(geo_50.0_summary["(N5,paradoxus)", ]), sd(geo_100.0_summary["(N5,paradoxus)", ]),
                              sd(geo_500.0_summary["(N5,paradoxus)", ]))
PAML.N5.paradoxus.paralog1.mean.list <- c(mean(PAML_3.0_summary["N6_paradoxusYDR418W", ]), 
                                          mean(PAML_10.0_summary["N6_paradoxusYDR418W", ]),
                                          mean(PAML_50.0_summary["N6_paradoxusYDR418W", ]),
                                          mean(PAML_100.0_summary["N6_paradoxusYDR418W", ]),
                                          mean(PAML_500.0_summary["N6_paradoxusYDR418W", ]))
PAML.N5.paradoxus.paralog1.sd.list <- c(sd(PAML_3.0_summary["N6_paradoxusYDR418W", ]), 
                                        sd(PAML_10.0_summary["N6_paradoxusYDR418W", ]),
                                        sd(PAML_50.0_summary["N6_paradoxusYDR418W", ]),
                                        sd(PAML_100.0_summary["N6_paradoxusYDR418W", ]),
                                        sd(PAML_500.0_summary["N6_paradoxusYDR418W", ]))
PAML.N5.paradoxus.paralog2.mean.list <- c(mean(PAML_3.0_summary["N11_paradoxusYEL054C", ]), 
                                          mean(PAML_10.0_summary["N11_paradoxusYEL054C", ]),
                                          mean(PAML_50.0_summary["N11_paradoxusYEL054C", ]),
                                          mean(PAML_100.0_summary["N11_paradoxusYEL054C", ]),
                                          mean(PAML_500.0_summary["N11_paradoxusYEL054C", ]))
PAML.N5.paradoxus.paralog2.sd.list <- c(sd(PAML_3.0_summary["N11_paradoxusYEL054C", ]), 
                                        sd(PAML_10.0_summary["N11_paradoxusYEL054C", ]),
                                        sd(PAML_50.0_summary["N11_paradoxusYEL054C", ]),
                                        sd(PAML_100.0_summary["N11_paradoxusYEL054C", ]),
                                        sd(PAML_500.0_summary["N11_paradoxusYEL054C", ]))

Now plot all these tree branch length estimates from IGC + MG94 model

# N0_N1
plot(IGC.geo.list, IGC.N0.N1.mean.list)
abline(h = 0.0197240946542)

plot of chunk unnamed-chunk-15

plot(IGC.geo.list, IGC.N0.N1.sd.list)

plot of chunk unnamed-chunk-15

# N0_kluyveri
plot(IGC.geo.list, IGC.N0.kluyveri.mean.list)
abline(h = 0.215682181791)

plot of chunk unnamed-chunk-15

plot(IGC.geo.list, IGC.N0.kluyveri.sd.list)

plot of chunk unnamed-chunk-15

# N1_N2
plot(IGC.geo.list, IGC.N1.N2.mean.list)
abline(h = 0.20925129872)

plot of chunk unnamed-chunk-15

plot(IGC.geo.list, IGC.N1.N2.sd.list)

plot of chunk unnamed-chunk-15

# N1_castellii
plot(IGC.geo.list, IGC.N1.castellii.mean.list)
abline(h = 0.171684721483)

plot of chunk unnamed-chunk-15

plot(IGC.geo.list, IGC.N1.castellii.sd.list)

plot of chunk unnamed-chunk-15

# N2_N3
plot(IGC.geo.list, IGC.N2.N3.mean.list)
abline(h = 0.0257112589202)

plot of chunk unnamed-chunk-15

plot(IGC.geo.list, IGC.N2.N3.sd.list)

plot of chunk unnamed-chunk-15

# N2_bayanus
plot(IGC.geo.list, IGC.N2.bayanus.mean.list)
abline(h = 0.0266075664688)

plot of chunk unnamed-chunk-15

plot(IGC.geo.list, IGC.N2.bayanus.sd.list)

plot of chunk unnamed-chunk-15

# N3_N4
plot(IGC.geo.list, IGC.N3.N4.mean.list)
abline(h = 0.0321083243449)

plot of chunk unnamed-chunk-15

plot(IGC.geo.list, IGC.N3.N4.sd.list)

plot of chunk unnamed-chunk-15

# N3_kudriavzevii
plot(IGC.geo.list, IGC.N3.kudriavzevii.mean.list)
abline(h = 0.0853588718458)

plot of chunk unnamed-chunk-15

plot(IGC.geo.list, IGC.N3.kudriavzevii.sd.list)

plot of chunk unnamed-chunk-15

# N4_N5
plot(IGC.geo.list, IGC.N4.N5.mean.list)
abline(h = 0.024947887926)

plot of chunk unnamed-chunk-15

plot(IGC.geo.list, IGC.N4.N5.sd.list)

plot of chunk unnamed-chunk-15

# N4_mikatae
plot(IGC.geo.list, IGC.N4.mikatae.mean.list)
abline(h = 0.0566627496729)

plot of chunk unnamed-chunk-15

plot(IGC.geo.list, IGC.N4.mikatae.sd.list)

plot of chunk unnamed-chunk-15

# N5_cerevisiae
plot(IGC.geo.list, IGC.N5.cerevisiae.mean.list)
abline(h = 0.0581451177847)

plot of chunk unnamed-chunk-15

plot(IGC.geo.list, IGC.N5.cerevisiae.sd.list)

plot of chunk unnamed-chunk-15

# N5_paradoxus
plot(IGC.geo.list, IGC.N5.paradoxus.mean.list)
abline(h = 0.0218788166581)

plot of chunk unnamed-chunk-15

plot(IGC.geo.list, IGC.N5.paradoxus.sd.list)

plot of chunk unnamed-chunk-15

Now plot same branch length estimates from each paralog in paml results

# N0_N1
matplot(IGC.geo.list, cbind(PAML.N0.N1.paralog1.mean.list, PAML.N0.N1.paralog2.mean.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "mean N0.N1 estimate")
abline(h = 0.0197240946542)
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

matplot(IGC.geo.list, cbind(PAML.N0.N1.paralog1.sd.list, PAML.N0.N1.paralog2.sd.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "sd N0.N1 estimate")
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

# N0_kluyveri
plot(IGC.geo.list, PAML.N0.kluyveri.paralog1.mean.list)
abline(h = 0.215682181791)

plot of chunk unnamed-chunk-16

plot(IGC.geo.list, PAML.N0.kluyveri.paralog1.sd.list)

plot of chunk unnamed-chunk-16

# N1_N2
matplot(IGC.geo.list, cbind(PAML.N1.N2.paralog1.mean.list, PAML.N1.N2.paralog2.mean.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "mean N1.N2 estimate")
abline(h = 0.20925129872)
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

matplot(IGC.geo.list, cbind(PAML.N1.N2.paralog1.sd.list, PAML.N1.N2.paralog2.sd.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "sd N1.N2 estimate")
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

# N1_castellii
matplot(IGC.geo.list, cbind(PAML.N1.castellii.paralog1.mean.list, PAML.N1.castellii.paralog2.mean.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "mean N1.castellii estimate")
abline(h = 0.171684721483)
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

matplot(IGC.geo.list, cbind(PAML.N1.castellii.paralog1.sd.list, PAML.N1.castellii.paralog2.sd.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "sd N1.castellii estimate")
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

# N2_N3
matplot(IGC.geo.list, cbind(PAML.N2.N3.paralog1.mean.list, PAML.N2.N3.paralog2.mean.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "mean N2.N3 estimate")
abline(h = 0.0257112589202)
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

matplot(IGC.geo.list, cbind(PAML.N2.N3.paralog1.sd.list, PAML.N2.N3.paralog2.sd.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "sd N2.N3 estimate")
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

# N2_bayanus
matplot(IGC.geo.list, cbind(PAML.N2.bayanus.paralog1.mean.list, PAML.N2.bayanus.paralog2.mean.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "mean N2.bayanus estimate")
abline(h = 0.0266075664688)
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

matplot(IGC.geo.list, cbind(PAML.N2.bayanus.paralog1.sd.list, PAML.N2.bayanus.paralog2.sd.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "sd N2.bayanus estimate")
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

# N3_N4
matplot(IGC.geo.list, cbind(PAML.N3.N4.paralog1.mean.list, PAML.N3.N4.paralog2.mean.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "mean N3.N4 estimate")
abline(h = 0.0321083243449)
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

matplot(IGC.geo.list, cbind(PAML.N3.N4.paralog1.sd.list, PAML.N3.N4.paralog2.sd.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "sd N3.N4 estimate")
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

# N3_kudriavzevii
matplot(IGC.geo.list, cbind(PAML.N3.kudriavzevii.paralog1.mean.list, PAML.N3.kudriavzevii.paralog2.mean.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "mean N3.kudriavzevii estimate")
abline(h = 0.0853588718458)
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

matplot(IGC.geo.list, cbind(PAML.N3.kudriavzevii.paralog1.sd.list, PAML.N3.kudriavzevii.paralog2.sd.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "sd N3.kudriavzevii estimate")
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

# N4_N5
matplot(IGC.geo.list, cbind(PAML.N4.N5.paralog1.mean.list, PAML.N4.N5.paralog2.mean.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "mean N4.N5 estimate")
abline(h = 0.024947887926)
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

matplot(IGC.geo.list, cbind(PAML.N4.N5.paralog1.sd.list, PAML.N4.N5.paralog2.sd.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "sd N4.N5 estimate")
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

# N4_mikatae
matplot(IGC.geo.list, cbind(PAML.N4.N5.paralog1.mean.list, PAML.N4.mikatae.paralog2.mean.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "mean N4.mikatae estimate")
abline(h = 0.0566627496729)
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

matplot(IGC.geo.list, cbind(PAML.N4.mikatae.paralog1.sd.list, PAML.N4.mikatae.paralog2.sd.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "sd N4.mikatae estimate")
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

# N5_cerevisiae
matplot(IGC.geo.list, cbind(PAML.N5.cerevisiae.paralog1.mean.list, PAML.N5.cerevisiae.paralog2.mean.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "mean N5.cerevisiae estimate")
abline(h = 0.0581451177847)
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

matplot(IGC.geo.list, cbind(PAML.N5.cerevisiae.paralog1.sd.list, PAML.N5.cerevisiae.paralog2.sd.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "sd N5.cerevisiae estimate")
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

# N5_paradoxus
matplot(IGC.geo.list, cbind(PAML.N5.paradoxus.paralog1.mean.list, PAML.N5.paradoxus.paralog2.mean.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "mean N5.paradoxus estimate")
abline(h = 0.0218788166581)
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16

matplot(IGC.geo.list, cbind(PAML.N5.paradoxus.paralog1.sd.list, PAML.N5.paradoxus.paralog2.sd.list),
        type = c("p"), pch = 1, col = 1:2, ylab = "sd N5.paradoxus estimate")
legend("right", legend = c("paralog1", "paralog2"), col = 1:2, pch = 1)

plot of chunk unnamed-chunk-16